build dummy dataset.py (1471B)
1 #!/usr/bin/env python2 2 3 from __future__ import print_function 4 import sys 5 import os 6 import shutil 7 import random 8 9 def construct_dummy_dataset(kind, prefix, n_entities, n_relations): 10 os.mkdir(prefix) 11 12 with open(prefix+'/entities', 'w') as file: 13 for i in xrange(n_entities): 14 file.write('E{0}\n'.format(i)) 15 16 with open(prefix+'/relations', 'w') as file: 17 for i in xrange(n_relations): 18 file.write('R{0}\n'.format(i)) 19 20 with open(prefix+'/train', 'w') as file: 21 for r in xrange(n_relations): 22 right = range(n_entities/2) 23 random.shuffle(right) 24 if kind=='id': 25 for e in xrange(n_entities): 26 file.write('{0}\t{1}\t{2}\n'.format(e, r, e)) 27 elif kind=='halfperm': 28 for e in xrange(n_entities/2): 29 file.write('{0}\t{1}\t{2}\n'.format(e, r, right[e]+n_entities/2)) 30 else: 31 raise error('Unknown kind') 32 33 shutil.copyfile(prefix+'/train', prefix+'/valid') 34 shutil.copyfile(prefix+'/train', prefix+'/test') 35 36 if __name__ == '__main__': 37 if len(sys.argv)<5: 38 print('Usage: {0} {{id, halfperm}} dataset_name n_entities n_relations'.format(sys.argv[0]), file=sys.stderr) 39 sys.exit(1) 40 kind = sys.argv[1] 41 prefix = sys.argv[2] 42 43 n_entities = int(sys.argv[3]) 44 n_relations = int(sys.argv[4]) 45 46 construct_dummy_dataset(kind, prefix, n_entities, n_relations)